

*This file does first stage plots and computes the correlation between the exogenous productivities and the log consumer price indices (Figures B.1 and B.2)

*Bring in regression data
use regression_data_sensitivity_rho.dta, clear
graph drop _all

*drop ROW as an originId
drop if originId==62
keep if year==2010

egen clusterId=group(originId)

************************************************************************************

*Collapsed version: average across importers
collapse logX_theta logL* instrument_* logprice_base* clusterId, by(year sectorId originId)

drop instrument_low instrument_high instrument_pure_low instrument_pure_high
save temp, replace

*Reshaping prices
keep originId year logprice_base*
duplicates drop
reshape long logprice_base, i(originId year) j(sectorId)

merge 1:1 originId sectorId year using temp

****************************************************
*Correlation Analysis: Prices and Productivity
******************************************************

xi i.sectorId i.originId

*Correlation between consumer prices and residuals (exogenous productivities)
local dvars logX_theta
local indvars c.logL#sectorId
local instruments c.instrument_base#sectorId

ivreg2 `dvars'  (`indvars'=`instruments') _I* , cluster(clusterId)
predict fitted_values, xb
gen residuals = logX_theta - fitted_values
reghdfe logprice_base residuals , cluster(clusterId) absorb(sectorId#year originId#year)

*Translating to Pearson correlation coefficient
regress logprice_base _I* , cluster(clusterId) 
predict fitted_values_p, xb
gen residuals_p = logprice_base - fitted_values_p
regress residuals _I*, cluster(clusterId) 
predict fitted_values_r, xb
gen residuals_r = residuals - fitted_values_r
egen sd_residuals_p = sd(residuals_p)
egen sd_residuals_r = sd(residuals_r)
replace residuals_r = residuals_r/sd_residuals_r
replace residuals_p = residuals_p/sd_residuals_p

regress residuals_p residuals_r, cluster (clusterId)


*Correlation between consumer prices and total productivity (endogenous +exogenous)

local dvars logX_theta
local indvars c.logL#sectorId
local instruments c.instrument_base#sectorId

regress `dvars'  _I*, cluster(clusterId)
predict fitted_valuesz, xb
gen residualsz = logX_theta - fitted_valuesz
reghdfe logprice_base residualsz, cluster(clusterId) absorb(sectorId#year originId#year)

*Translating to Pearson correlation coefficient
regress logprice_base _I*, cluster(clusterId) 
predict fitted_values_pz, xb
gen residuals_pz = logprice_base - fitted_values_pz
regress residualsz _I*, cluster(clusterId) 
predict fitted_values_rz, xb
gen residuals_rz = residualsz - fitted_values_rz
egen sd_residuals_pz = sd(residuals_pz)
egen sd_residuals_rz = sd(residuals_rz)
replace residuals_rz = residuals_rz/sd_residuals_rz
replace residuals_pz = residuals_pz/sd_residuals_pz

regress residuals_pz residuals_rz, cluster (clusterId)

drop logprice_base*

regress logL_share instrument_pure_base _I*


*Now doing first stage plots

*Step 1: construct and residualize log l_ik (interated with sector dummies) and instruments


forvalues x=3/17	{
	gen logL`x' = logL if sectorId==`x'
	replace logL`x'=0 if logL`x'==.
		gen instrument`x' = instrument_base if sectorId==`x'
	replace instrument`x'=0 if instrument`x'==.
	}

forvalues x=3/17	{
	regress logL`x' _I*
	predict fitted_values`x', xb
	gen logL_resid`x' = logL`x'-fitted_values`x'
	drop fitted_values`x'
	regress instrument`x' _I*
	predict fitted_values`x', xb
	gen inst_resid`x' = instrument`x'-fitted_values`x'
}


*Step 1.5: try "first stage" univariate regressions
*regress logL_resid3 instrument_resid3 if sectorId==3 & year==2010
*twoway (scatter logL_resid3 instrument_resid3 if sectorId==3 & year==2010 ) (lfit logL_resid3 instrument_resid3 if sectorId==3 & year==2010 ), name(xxx3) title("")
*graph save sector3
*twoway (scatter logL_resid7 instrument_resid7 if sectorId==7 & year==2010 ) (lfit logL_resid7 instrument_resid7 if sectorId==7 & year==2010 ), name(xxx7) title("")
*graph save sector7
*twoway (scatter logL_resid8 instrument_resid8 if sectorId==8 & year==2010 ) (lfit logL_resid8 instrument_resid8 if sectorId==8 & year==2010 ), name(xxx8) title("")
*graph save sector8
*twoway (scatter logL_resid9 instrument_resid9 if sectorId==9 & year==2010 ) (lfit logL_resid9 instrument_resid9 if sectorId==9 & year==2010 ), name(xxx9) title("")
*graph save sector9

*Step 2: compute first stage regressions and fitted values
drop instrument_base instrument_pure_base

forvalues x=3/17	{
	regress logL_resid`x' inst_resid*, noc
	predict ybar, xb
	local q=`x'-2
	gen y_resid`x' = logL_resid`x'-ybar+e(b)[1,`q']*inst_resid`x'
	drop ybar
	*Check to make sure we recover the same coefficient
	regress y_resid`x' inst_resid`x'
	regress logL`x' instrument* _I*
}

*Note: everything checks out as consistent, same coefficients in all regressions

*Step 3: make plots with only the sector k observations displayed

forvalues x=3/17	{
local q=`x'-2
twoway (scatter logL_resid`x' inst_resid`x' if sectorId==`x', mcolor(gray)) (lfit logL_resid`x' inst_resid`x' if sectorId==`x',lcolor(gray) ), ytitle("") xtitle("")  legend(off) name(s`x') title("") /*
**/   graphregion(color(white))
graph export "FS_fit_sector`q'.pdf", replace
}
*graph combine s3 s4 s5 s6 s7 s8 s9 s10 s11 s12 s13 s14 s15 s16 s17, ycommon xcommon
graph drop _all

*Now doing reduced form plots

regress logX_theta instrument* _I*
predict ybar, xb

forvalues x=3/17	{
	local q=`x'-2
	gen rf_resid`x' = logX_theta-ybar+e(b)[1,`q']*instrument`x'
		egen xxx=mean(instrument`x'), by(sectorId)
	gen instrument_dm`x'=instrument`x'-xxx
	drop xxx
twoway (scatter rf_resid`x' instrument_dm`x' if sectorId==`x', mcolor(gray))(lfit rf_resid`x' instrument_dm`x' if sectorId==`x' ,lcolor(gray)), ytitle("") xtitle("") legend(off) name(s`x') /*
*/   graphregion(color(white))
graph export "RF_fit_sector`q'.pdf", replace
}
*graph combine s3 s4 s5 s6 s7 s8 s9 s10 s11 s12 s13 s14 s15 s16 s17, ycommon xcommon




